#ACTIVATE LIBRARIES
library(tidyverse)
library(boot)
library(igraph)
library(foreach)
library(doParallel)
library(rstudioapi)

#SET WORKING DIRECTORY
fileloc <- getSourceEditorContext()$path %>%
  str_remove("EUDR Replication.R")
setwd(fileloc)

#GLOBAL LAND COVER ACCURACY DATA
#READ IN DATA
glc <- read.csv("GlobalLCDatasetAccuracy.csv", stringsAsFactors = FALSE)

#FUNCTION TO COMPUTE FOREST FALSE POSITIVES PER NON-FOREST SAMPLE PIXEL
#FROM INFORMATION REPORTED IN LITERATURE
glc <- glc %>% mutate(ForestFPRisk = NA)

for(i in 1:nrow(glc)){
  if(!is.na(glc$Forest.Training.Points[i])){
    p <- glc$Forest.Producer.Accuracy[i]/100
    u <- glc$Forest.User.Accuracy[i]/100
    n <- glc$N[i]
    nf <- glc$Forest.Training.Points[i]
    numerator <- p * nf * (1 - u)
    denominator <- u * (n - nf)
    glc$ForestFPRisk[i] <- numerator/denominator*100
  }
}

#SET PLOTTING DEFAULTS
theme_opts <- theme(
  legend.title=element_text(size=14, face="bold"),
  plot.title=element_text(size=14, face="bold"),
  axis.title=element_text(size=14, face="bold"),
  axis.text=element_text(size=12),
  axis.text.x = element_text(angle = 315, vjust = 1, hjust=0),
  legend.text=element_text(size=12),
  strip.text = element_text(size=14, face="bold"),
  legend.position="bottom"
)

#PREPARE DATA
glc <- glc %>%
  mutate(Incorrectly.Classified = 100 - Overall.Accuracy,
         Forest.False.Positives = 100 - Forest.User.Accuracy,
         Forest.False.Negatives = 100 - Forest.Producer.Accuracy) %>%
  pivot_longer(cols=14:17) %>%
  mutate(Measure = str_replace_all(name, fixed("."), fixed(" ")))

glc$Measure[glc$Measure=="ForestFPRisk"] <- "Forest False Positives\nper Non-Forest Pixel"

glc$Dataset[glc$Dataset=="ESRI Living Atlas Land Cover"] <- "ESRI Living Atlas"

#CREATE GLOBAL ACCURACY PLOTS - FIGURE 2
fig1 <- ggplot(data=glc, aes(x=value))+
  geom_histogram(fill="white", bins = 5, linewidth=1, color="black")+
  scale_x_continuous("Percentage of Pixels")+
  scale_y_continuous("Number of Assessments")+
  facet_grid(Dataset~Measure, scales="free_y")+
  theme_opts
png("Figure2.png", width = 10, height = 10, units = "in", res=150)
fig1
dev.off()

#COMPUTING RISK OF FALSE POSITIVE FOR NON-COMPLIANCE DUE TO MISCLASSIFYING FOREST AS
#NON-FOREST - FIGURE 3
#CREATE FUNCTION TO COMPUTE RISKS
def_risk <- function(probs, sizes){
  conditions <- expand.grid(probs, sizes) %>%
    as_tibble()
  names(conditions) <- c("probs", "sizes")
  
  set.seed(3141)
  cl <- makeCluster(6)
  registerDoParallel(cl)
  
  risks <- foreach(i = 1:nrow(conditions), .combine="rbind") %dopar% {
    rbinom(n = 50000, prob = conditions$probs[i], size = conditions$sizes[i])
  } %>% as_tibble() %>%
    mutate(hectares = conditions$sizes/100) %>%
    pivot_longer(cols = 1:50000)
  
  return(risks)
  
}

#COMPUTE VALUES FOR FOREST FALSE NEGATIVE RISK
fn.risk <- def_risk(probs = glc$value[!is.na(glc$value)&(glc$Measure=="Forest False Negatives")]/100,
                    sizes = seq(50, 500, 50))

fn.risk.dist <- fn.risk %>%
  group_by(hectares) %>%
  reframe(pixels = quantile(value, probs = (1:100)/100),
          q = (-100:-1)/-100) %>%
  mutate(pixelsphect = pixels/as.numeric(hectares),
         hectares = factor(hectares, ordered=TRUE))

quantile(fn.risk.dist$pixelsphect)

fig3 <- ggplot(data=fn.risk.dist, aes(x = q*100, y = pixelsphect, color=hectares))+
  geom_line(linewidth=1.25)+
  scale_x_continuous("Percentage of Simulations Generating\nat Least as Many False Negative Forest Pixels per Hectare")+
  scale_y_continuous("False Negative Forest Pixels per Hectare")+
  scale_color_viridis_d("Hectares of\nForest on Plot", end=0.8)+
  theme_opts

png("Figure3.png", width = 8, height = 6, units="in", res=150)
fig3
dev.off()

#SIMULATIONS TO ESTIMATE THE RISK OF FALSE POSITIVE FOR NON-COMPLIANCE DUE TO 
#MISCLASSIFYING COFFEE AS FOREST IN BASELINE YEAR

#SIMULATE EU FOREST DEFINITION FALSE POSITIVE RISK - FIGURE 3
#CREATE FUNCTION TO MAKE LATTICE BASED ON SIZE
make_lattice_hect <- function(hectares){
  pixels <- hectares * 100
  lat <- make_lattice(length = sqrt(pixels), dim = 2)
  return(lat)
}

#CREATE FUNCTION TO IDENTIFY TOTAL PIXELS IN CONTIGUOUS MISCLASSIFIED AREAS
#LARGE ENOUGH TO QUALIFY AS FOREST UNDER EU DEFINITION - NO FOREST BOUNDARY
contiguous_misclassified <- function(lat, prob){
  prob <- prob[[sample(1:length(prob), 1)]]
  V(lat)$misclassified <- rbinom(n=vcount(lat), size=1, prob = prob)
  lat <- subgraph(lat, which(V(lat)$misclassified==1))
  components <- clusters(lat, mode="weak")$csize
  return(sum(components[components>=50]))
}

#CREATE FUNCTION TO IDENTIFY TOTAL PIXELS IN CONTIGUOUS MISCLASSIFIED AREAS
#LARGE ENOUGH TO QUALIFY AS FOREST UNDER EU DEFINITION - ONE FOREST BOUNDARY
contiguous_misclassified_oneforest <- function(lat, prob){
  prob <- prob[[sample(1:length(prob), 1)]]
  V(lat)$misclassified <- rbinom(n=vcount(lat), size=1, prob = prob)
  lat <- subgraph(lat, which(V(lat)$misclassified==1))
  mems <- clusters(lat, mode="weak")$membership
  row1 <- vcount(lat) %>% sqrt()
  bordermems <- unique(mems[1:row1])
  mems <- table(mems)
  return(sum(mems[(mems>=50)|(names(mems)%in%bordermems)]))
}

#SIMULATE RISK OF FALSE DETECTION - NO FOREST BORDER
#GLOBAL FOREST FALSE POSITIVES PER NON-FOREST PROBABILITIES
hectares <- 1:10
glcprobs <- glc$value[glc$Measure=="Forest False Positives"]
glcprobs <- glcprobs[!is.na(glcprobs)]
noforest <- foreach(hectares=hectares, .combine = "cbind", .packages = "igraph") %dopar% {
  lat <- make_lattice_hect(hectares)
  out <- replicate(n = 50000, 
                   expr = contiguous_misclassified(lat=lat, prob=glcprobs/100),
                   simplify = TRUE)
  out
}

#SIMULATE RISK OF FALSE DETECTION - ONE FOREST BORDER
#CONFUSION MATRIX PROBABILITIES
#GLOBAL FOREST FALSE POSITIVE PROBABILITIES
glcprobs <- glc$value[glc$Measure=="Forest False Positives\nper Non-Forest Pixel"]
glcprobs <- glcprobs[!is.na(glcprobs)]
oneforest <- foreach(hectares=hectares, .combine = "cbind", .packages = "igraph") %dopar% {
  lat <- make_lattice_hect(hectares)
  out <- replicate(n = 50000, 
                   expr = contiguous_misclassified_oneforest(lat=lat, prob=glcprobs/100),
                   simplify = TRUE)
  out
}

#PLOT EXPECTED DISTRIBUTION OF NONCOMPLIANT PIXELS - FIGURE 4
oneforest <- oneforest %>%
    as_tibble() %>%
    mutate(border = "Forest Bordering on One Side")
noforest <- noforest %>%
    as_tibble() %>%
    mutate(border = "No Adjacent Forest")

pixels.dist <- rbind(oneforest,
                      noforest) %>%
  pivot_longer(cols = 1:10) %>%
  mutate(hectares = factor(str_remove(name, fixed("result.")), levels = 1:10, ordered = TRUE)) %>%
  group_by(hectares, border) %>%
  reframe(pixels = quantile(value, probs = (1:100)/100),
          q = (-100:-1)/-100) %>%
  mutate(pixelsphect = pixels/as.numeric(hectares),
         hectares = factor(hectares, ordered=TRUE))

fig4 <- ggplot(data=pixels.dist, aes(x = q*100, y = pixelsphect, color=hectares))+
  geom_line(linewidth=0.75)+
  scale_x_continuous("Percentage of Simulations Generating at\nLeast as Many False Positive Forest Pixels per Hectare")+
  scale_y_continuous("False Positive Forest Pixels per Hectare")+
  scale_color_viridis_d("Hectares", end=0.8)+
  facet_wrap(~border, ncol=1, scales="free_y")+
  theme_opts
png("Figure4.png", width = 8, height = 6, units = "in", res=150)
fig4
dev.off()